class: center, middle, inverse, title-slide # Batch effects in scRNA-seq data ## Committee meeting ### Almut Lütge ### DMLS - University of Zürich ### 2021-03-02 --- class: inverse, center, middle # Batch effects --- class: inverse, center # Batch effects .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/batch1.png" width="80%" height="80%" /> ] -- .pull-right[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/batch2.png" width="80%" height="80%" /> ] -- <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/batch3.png" width="60%" height="60%" /> --- class: center # Sinergia: Lung fibroblast from different patients .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_celltype.png" width="100%" height="100%" /> ] -- .pull-right[ <br> <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_patient.png" width="87%" height="87%" /> ] --- class: center, middle, inverse <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/cellmixs_preprint.png" width="100%" height="100%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_6.png" width="120%" height="120%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_5.png" width="120%" height="120%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_4.png" width="120%" height="120%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_3.png" width="120%" height="120%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_2.png" width="100%" height="100%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics_1.png" width="100%" height="100%" /> --- class: center, inverse # Project overview <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/Current_work_Benchmark_mixing_metrics.png" width="100%" height="100%" /> --- class: inverse, center, middle # Batch characterization --- # Batch characterization .pull-left[ + 7 datasets + 9 batch effects + *patient, protocol, storage* ] -- .pull-right[ + **Variance partitioning** + **logFC** distribution and correlation + DE genes/overlap ] -- ### Variance partition ### `\(Y_g = \mu + X_{p}\alpha_{pg} + X_{b}\beta_{bg} + X_{p:b} \gamma_{(p:b)g} + \epsilon_g\)` `\(Y_g\)`: normalized expression gene g `\(\mu\)`: baseline expression `\(X_{p}\)`, `\(X_{b}\)`, `\(X_{p:b}\)`: design matrices for the (random) cell types, batches and interactions effect `\(\alpha_{pg} \sim N(0, \sigma^2_{pg})\)`,..: corresponding random effects `\(\epsilon_i \sim N(0, \sigma^2_{g})\)` corresponding errors --- class: center # Percent variance explained by ..: <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/tern2.png" width="60%" height="60%" /> --- class: center # Log fold change distributions <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/logFC2.png" width="100%" height="100%" /> --- class: inverse, center, middle # Metrics --- class: center, inverse # Metrics <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/metrics2.png" width="100%" height="100%" /> --- class: center, inverse # Metrics <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/metrics1.png" width="100%" height="100%" /> --- class: center, inverse # Metrics <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/metrics.png" width="100%" height="100%" /> --- class: center # Cell-specific Mixing Score (cms) -- .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/cms_tsne_X-1.png" width="100%" height="100%" /> ] -- .pull-right[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/cms_dist_X-1.png" width="100%" height="100%" /> ] --- class: center # Cell-specific Mixing Score (cms) -- .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/cms_tsne_Y-1.png" width="100%" height="100%" /> ] -- .pull-right[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/cms_dist_Y-1.png" width="100%" height="100%" /> ] --- class: center # Sinergia: Lung fibroblast from different patients <br> <br> .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_patient.png" width="80%" height="80%" /> ] -- .pull-right[ <br> <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_cms.png" width="95%" height="95%" /> ] --- class: center # Sinergia: Lung fibroblast from different patients <br> <br> .pull-left[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_harmony.png" width="80%" height="80%" /> ] -- .pull-right[ <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/sin_harmony_cms.png" width="100%" height="100%" /> ] --- class: inverse, center, middle # Benchmark --- class: center # Task 1: Batch characteristics <br> <br> <br> <br> <br> ### *Aim: Test whether metrics reflect batch strength across datasets* -- Spearman correlation of metrics with surrogates of batch strength (e.g., PVE-Batch and proportion of DE genes between batches) across datasets --- class: center # Batch characteristics <!-- --> --- class: center # Task 2: Batch label permutation <br> <br> <br> <br> <br> ### *Aim: Negative control and test whether metrics scale with randomness* -- Spearman correlation of metrics with the percentage of randomly permuted batch label --- class: center # Batch label permutation
--- class: center # Batch label permutation <br> <!-- --> --- class: center # Task 3: Scaling and detection limits <br> <br> <br> ### *Aim: Test whether metrics scale with (synthetic) batch strength; Estimate lower limit of batch detection* -- Spearman correlation of metrics with the batch logFC in simulation series on the same dataset; Minimal batch logFC that is recognized from the metrics as batch effect --- class:
--- class: center # Scaling and Sensitivity <!-- --> --- class: center # Scaling and Sensitivity -- <br> .pull-left[ <!-- --> ] -- .pull-right[ <!-- --> ] --- class: center # Task 4: Imbalanced batches <br> <br> <br> ### *Aim: Reaction of metrics to imbalance cell type abundance within the same dataset* -- Test sensitivity towards imbalance of cell type abundance --- class:
--- class: center # Imbalanced batch effects <br> <!-- --> --- class: center # Imbalanced batch effects <br> <br> <!-- --> --- class: center, inverse, middle # Summary --- class: center # Summary <!-- --> --- class: center, inverse, middle --- class: center, middle, inverse # 2. *Omni_batch*: open and continuous benchmarking of single cell batch correction methods --- class: centre # State of the art <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/benchmarks.png" width="100%" height="100%" /> --- class: centre # Benchmark A <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/res_bench1.png" width="80%" height="80%" /> --- class: centre # Benchmark B <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/res_bench2.png" width="80%" height="80%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_1.png" width="95%" height="95%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_2.png" width="95%" height="95%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_3.png" width="95%" height="95%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_4.png" width="95%" height="95%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_5.png" width="95%" height="95%" /> --- class: centre # Omni-benchmark <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/omni_6.png" width="95%" height="95%" /> --- class: center, middle, inverse # 3. Teaching and course work --- class: # Teaching and course work <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/committee_2/figures/teaching.png" width="95%" height="95%" /> --- --- class: center, middle, inverse # Extra slides --- class: # Local Inverse Simpson Index (lisi) - neighborhood diversity -- - effective number of batches -- - neighbor weighting: + euclidean distance --> wisi + no weighting --> isi + Gaussian kernel based weighting --> lisi `$$\frac{1}{\sum_{b=1}^{B} p(b)}$$` --- class: center # k-nearest neighbour batch effect test (kBet) <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/kbet.png" width="100%" height="100%" /> (Buttner et. al., 2019) --- class: center # Mixing metric (mm) <img src="data:image/png;base64,#/home/almut/Dokumente/Phd/presentation/CellMixS_final/figures/mm.png" width="100%" height="100%" /> ---